# Silence warnings
options(warn = -1)

# Install groundhog package
install.packages("groundhog")

# Load groundhog package
library("groundhog")

# List needed packages
packages <- c(
    "dplyr", "estimatr", "fixest", "ggplot2", "ggtext", "marginaleffects",
    "modelsummary", "patchwork", "readr", "scales", "skimr", "stringr",
    "tidyr"
)

# Load package versions from replication date
groundhog.library(packages, "2024-04-22")

# Write function to generate clustered bootstrap sample
gen_clustered_boot_sample <- function(data, unique_ids, id_name, n) {
    # Generate random sample of IDs (with replacement)
    random_ids <- sample(
        x = unique_ids,
        size = n,
        replace = TRUE
    )

    # Generate bootstrap data
    boot_data <- as.data.frame(
        data.table::rbindlist(
            lapply(
                X = random_ids,
                FUN = function(random_id) {
                    data[dplyr::pull(data, id_name) == random_id, ]
                }
            )
        )
    )

    # Grab original IDs from bootstrap data
    og_ids_in_boot <- dplyr::pull(boot_data, id_name)

    # Get lagged original IDs from bootstrap data
    lagged_og_ids_in_boot <- dplyr::lag(
        x = og_ids_in_boot,
        default = "DEFAULT"
    )

    # Generate bootstrap IDs
    boot_data$boot_id <- factor(
        cumsum(
            og_ids_in_boot != lagged_og_ids_in_boot
        )
    )

    # Return boot sample
    return(boot_data)
}

# Set seed
set.seed(8675309)
